package com.cmge.ad.spider.video.youku;

import java.util.ArrayList;
import java.util.List;
import java.util.Random;

import org.springframework.util.StringUtils;

import com.cmge.ad.util.HttpClientUtil;

/**
 * @desc	优酷数据抓取
 * 
 * 			申请多个开发者账号  单个账户每小时1000次上限
 * 			提供搜索接口  获取 匹配的视频Id 然后根据Id获取具体视频内容  返回的是swf格式
 * 
 * @author	ljt
 * @time	2014-12-30 下午3:51:19
 */
public class YouKuCrawl {
	
	// 存放每个账号里的其中一个client_id
	private static List<String> clientList;
	
	private static final String searchUrl = "https://openapi.youku.com/v2/searches/video/by_keyword.json?paid=0&public_type=all";
	
	// 存放重试失败的clientId 线程绑定
	private static ThreadLocal<List<String>> retryThreadList = new ThreadLocal<List<String>>();
	
	static{
		clientList = new ArrayList<String>();
		clientList.add("668136f544d4c88c");
		clientList.add("8f73c8fb5bf55cba");
		clientList.add("1f2d57f9b5ea2ce9");
	}
	
	/**
	 * 搜素指定关键字的视频内容
	 * @param key
	 */
	public static void search(String key){
		// 从list中随机选一个
		String clientId = clientList.get(new Random().nextInt(clientList.size()));
		System.out.println("选中："+clientId);
		
		// 重试次数必须小于等于clientList size,否则死循环
		String result = retry(searchUrl,clientId,key,clientList.size()-1);
		// 清空线程中retryList
		retryThreadList.set(null);
		System.out.println("搜索结果："+ result);
		
		if(!StringUtils.isEmpty(result)){
			// 解析列表  获取视频Id
			
		}
	}
	
	/**
	 * 重试
	 * @param num
	 * @return
	 */
	public static String retry(String url,String clientId,String key,int num){
		String result = HttpClientUtil.get(url +
				"&client_id=" + clientId + 
				"&keyword=" + key);
		if(StringUtils.isEmpty(result) && num >=1){
			System.out.println("重试...");
			List<String> list = retryThreadList.get();
			if(list == null){
				list = new ArrayList<String>();
			}
			list.add(clientId);
			retryThreadList.set(list);
			
			// 重试
			return retry(searchUrl,getAppId(clientId),key,num-1);
		}
		
		return result;
	}
	
	
	/**
	 * 线程中获取不包含指定clientId的clientId
	 * @param clientId
	 * @return
	 */
	public static String getAppId(String clientId){
		String result = null;
		String newId = clientList.get(new Random().nextInt(clientList.size()));
		List<String> list = retryThreadList.get();
		if(list != null && list.size() > 0){
			if(!list.contains(newId)){
				result = newId;
			}else{
				return getAppId(clientId);
			}
		}
		
		return result;
	}
	
	public static void main(String[] args) throws Exception {
		
		for(;;){
			YouKuCrawl.search("Java");
			Thread.sleep(1000);
		}
		
	}
	
}
